// Generate dataset containing 1st and 2nd blood pressure readings

clear

import delimited "C:\Files\WK\Cardiometabolic Risk Factors\Hypertension Analysis\Metabolic_syndrome_peter-20221130_913.csv"

keep oldidentificationnumber edad localidad presiãnarterialsistãlica-v183

local i = 0

foreach x of varlist presiãnarterialsistãlica_obsdate v128 v131 v134 v137 v140 v143 v146 v149 v152 {
         local i = `i' + 1
		 rename `x' SBP_obsDatetime`i'
}

local i = 0

foreach x of varlist presiãnarterialsistãlica_encount v129 v132 v135 v138 v141 v144 v147 v150 v153 {
         local i = `i' + 1
		 rename `x' SBP_encounterType`i'
}

local i = 0

foreach x of varlist presiãnarterialdiastãlica_obsdat v158 v161 v164 v167 v170 v173 v176 v179 v182 {
         local i = `i' + 1
		 rename `x' DBP_obsDatetime`i'
}

local i = 0

foreach x of varlist presiãnarterialdiastãlica_encoun v159 v162 v165 v168 v171 v174 v177 v180 v183 {
         local i = `i' + 1
		 rename `x' DBP_encounterType`i'
}

rename presiãnarterialsistãlica* SBP#, renumber(1)
rename presiãnarterialdiastãlica* DBP#, renumber(1)



// cleaning

. duplicates list oldidentificationnumber

// drop if oldidentificationnumber == "2" // empty observation 

// drop observations with weird/corrupted medical record numbers

// generate non_numeric = missing(real(oldidentificationnumber))
// drop if non_numeric > 0

drop if inlist(oldidentificationnumber, "9q337", "A50220")

drop if inlist(oldidentificationnumber, "Culum", "JF61022", "YJ200")

drop if inlist(oldidentificationnumber, "2/9/2018", "5/8/2018", "2/20/2018", "0", "1")

drop if oldidentificationnumber=="  5/8/2018"
drop if oldidentificationnumber=="  2/9/2018"
drop if oldidentificationnumber==" 2/20/2018"

destring oldidentificationnumber, replace

gen byte notnumeric = real(oldidentificationnumber)==.

tab notnumeric
list oldidentificationnumber if notnumeric==1

// No observations here with meaningful data that much medical record numbers in step9 of HTN analysis, therefore dropping all (SA, 11/28/2024)
drop if notnumeric == 1

drop if oldidentificationnumber=="18-08-2017- 01"
drop if oldidentificationnumber=="2575 62133 1416"
drop if oldidentificationnumber=="48121.."
drop if oldidentificationnumber=="76440.."
drop if oldidentificationnumber=="M9112022"

destring oldidentificationnumber, replace

sort oldidentificationnumber
duplicates list oldidentificationnumber

// 4 lines below, oldidentificationnumber is in string format, added "" and if missing() (9/16, SA)

drop if oldidentificationnumber == 2 & edad == 29

drop if oldidentificationnumber == 9102 & !missing(localidad)

drop if oldidentificationnumber == 7613071 & !missing(localidad)

drop if missing(oldidentificationnumber)

//look at validity of age data
sort edad
//I think we can keep weird data just change age to missing
replace edad=. if edad<18
//on second thought, I think we drop for age less than 18 anyway
drop if edad == .
//upper age ranges ae believable

drop edad localidad notnumeric
rename oldidentificationnumber id_anterior

browse if !missing(SBP_obsDatetime1) & !missing(SBP_obsDatetime2)

// Keep only observations that have a second blood pressur reading for the merge
keep if !missing(SBP2) & !missing(DBP2)

// For export, keep just the first and second BP readings
keep id_anterior SBP1 SBP_obsDatetime1 SBP2 SBP_obsDatetime2 DBP1 DBP_obsDatetime1 DBP2 DBP_obsDatetime2

// save "C:\Files\WK\Cardiometabolic Risk Factors\Hypertension Analysis\First_second_BP_readings.dta"